{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Data Manipulation and Analysis with Pandas\n",
    "Data manipulation and analysis are key tasks in any data science or data analysis project. Pandas provides a wide range of functions for data manipulation and analysis, making it easier to clean, transform, and extract insights from data. In this lesson, we will cover various data manipulation and analysis techniques using Pandas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date Category  Value   Product  Sales Region\n",
       "0  2023-01-01        A   28.0  Product1  754.0   East\n",
       "1  2023-01-02        B   39.0  Product3  110.0  North\n",
       "2  2023-01-03        C   32.0  Product2  398.0   East\n",
       "3  2023-01-04        B    8.0  Product1  522.0   East\n",
       "4  2023-01-05        B   26.0  Product3  869.0  North"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.read_csv('data.csv')\n",
    "## fecth the first 5 rows\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2023-02-15</td>\n",
       "      <td>B</td>\n",
       "      <td>99.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>599.0</td>\n",
       "      <td>West</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2023-02-16</td>\n",
       "      <td>B</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>938.0</td>\n",
       "      <td>South</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>2023-02-17</td>\n",
       "      <td>B</td>\n",
       "      <td>69.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>143.0</td>\n",
       "      <td>West</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>2023-02-18</td>\n",
       "      <td>C</td>\n",
       "      <td>65.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>182.0</td>\n",
       "      <td>North</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2023-02-19</td>\n",
       "      <td>C</td>\n",
       "      <td>11.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>708.0</td>\n",
       "      <td>North</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Date Category  Value   Product  Sales Region\n",
       "45  2023-02-15        B   99.0  Product2  599.0   West\n",
       "46  2023-02-16        B    6.0  Product1  938.0  South\n",
       "47  2023-02-17        B   69.0  Product3  143.0   West\n",
       "48  2023-02-18        C   65.0  Product3  182.0  North\n",
       "49  2023-02-19        C   11.0  Product3  708.0  North"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Value</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>47.000000</td>\n",
       "      <td>46.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>51.744681</td>\n",
       "      <td>557.130435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>29.050532</td>\n",
       "      <td>274.598584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>108.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>27.500000</td>\n",
       "      <td>339.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>54.000000</td>\n",
       "      <td>591.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>70.000000</td>\n",
       "      <td>767.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>99.000000</td>\n",
       "      <td>992.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           Value       Sales\n",
       "count  47.000000   46.000000\n",
       "mean   51.744681  557.130435\n",
       "std    29.050532  274.598584\n",
       "min     2.000000  108.000000\n",
       "25%    27.500000  339.000000\n",
       "50%    54.000000  591.500000\n",
       "75%    70.000000  767.500000\n",
       "max    99.000000  992.000000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date         object\n",
       "Category     object\n",
       "Value       float64\n",
       "Product      object\n",
       "Sales       float64\n",
       "Region       object\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date        False\n",
       "Category    False\n",
       "Value        True\n",
       "Product     False\n",
       "Sales        True\n",
       "Region      False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Handling Missing Values\n",
    "df.isnull().any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date        0\n",
       "Category    0\n",
       "Value       3\n",
       "Product     0\n",
       "Sales       4\n",
       "Region      0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filled=df.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sales_fillNA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "      <td>754.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "      <td>398.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "      <td>522.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "      <td>869.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2023-01-06</td>\n",
       "      <td>B</td>\n",
       "      <td>54.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>192.0</td>\n",
       "      <td>West</td>\n",
       "      <td>192.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2023-01-07</td>\n",
       "      <td>A</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>936.0</td>\n",
       "      <td>East</td>\n",
       "      <td>936.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2023-01-08</td>\n",
       "      <td>C</td>\n",
       "      <td>89.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>488.0</td>\n",
       "      <td>West</td>\n",
       "      <td>488.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2023-01-09</td>\n",
       "      <td>C</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>772.0</td>\n",
       "      <td>West</td>\n",
       "      <td>772.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2023-01-10</td>\n",
       "      <td>A</td>\n",
       "      <td>22.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>834.0</td>\n",
       "      <td>West</td>\n",
       "      <td>834.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2023-01-11</td>\n",
       "      <td>B</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>842.0</td>\n",
       "      <td>North</td>\n",
       "      <td>842.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2023-01-12</td>\n",
       "      <td>B</td>\n",
       "      <td>60.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>West</td>\n",
       "      <td>557.130435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2023-01-13</td>\n",
       "      <td>A</td>\n",
       "      <td>70.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>628.0</td>\n",
       "      <td>South</td>\n",
       "      <td>628.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2023-01-14</td>\n",
       "      <td>A</td>\n",
       "      <td>69.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>423.0</td>\n",
       "      <td>East</td>\n",
       "      <td>423.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2023-01-15</td>\n",
       "      <td>A</td>\n",
       "      <td>47.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>893.0</td>\n",
       "      <td>West</td>\n",
       "      <td>893.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2023-01-16</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Product1</td>\n",
       "      <td>895.0</td>\n",
       "      <td>North</td>\n",
       "      <td>895.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2023-01-17</td>\n",
       "      <td>C</td>\n",
       "      <td>93.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>511.0</td>\n",
       "      <td>South</td>\n",
       "      <td>511.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2023-01-18</td>\n",
       "      <td>C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Product1</td>\n",
       "      <td>108.0</td>\n",
       "      <td>West</td>\n",
       "      <td>108.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2023-01-19</td>\n",
       "      <td>A</td>\n",
       "      <td>31.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>578.0</td>\n",
       "      <td>West</td>\n",
       "      <td>578.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2023-01-20</td>\n",
       "      <td>A</td>\n",
       "      <td>59.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>736.0</td>\n",
       "      <td>East</td>\n",
       "      <td>736.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2023-01-21</td>\n",
       "      <td>C</td>\n",
       "      <td>82.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>606.0</td>\n",
       "      <td>South</td>\n",
       "      <td>606.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2023-01-22</td>\n",
       "      <td>C</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>992.0</td>\n",
       "      <td>South</td>\n",
       "      <td>992.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2023-01-23</td>\n",
       "      <td>B</td>\n",
       "      <td>62.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>942.0</td>\n",
       "      <td>North</td>\n",
       "      <td>942.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2023-01-24</td>\n",
       "      <td>C</td>\n",
       "      <td>92.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>342.0</td>\n",
       "      <td>West</td>\n",
       "      <td>342.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2023-01-25</td>\n",
       "      <td>A</td>\n",
       "      <td>24.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>458.0</td>\n",
       "      <td>East</td>\n",
       "      <td>458.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2023-01-26</td>\n",
       "      <td>C</td>\n",
       "      <td>95.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>584.0</td>\n",
       "      <td>West</td>\n",
       "      <td>584.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2023-01-27</td>\n",
       "      <td>C</td>\n",
       "      <td>71.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>619.0</td>\n",
       "      <td>North</td>\n",
       "      <td>619.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2023-01-28</td>\n",
       "      <td>C</td>\n",
       "      <td>56.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>224.0</td>\n",
       "      <td>North</td>\n",
       "      <td>224.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2023-01-29</td>\n",
       "      <td>B</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Product3</td>\n",
       "      <td>617.0</td>\n",
       "      <td>North</td>\n",
       "      <td>617.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2023-01-30</td>\n",
       "      <td>C</td>\n",
       "      <td>51.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>737.0</td>\n",
       "      <td>South</td>\n",
       "      <td>737.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2023-01-31</td>\n",
       "      <td>B</td>\n",
       "      <td>50.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>735.0</td>\n",
       "      <td>West</td>\n",
       "      <td>735.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2023-02-01</td>\n",
       "      <td>A</td>\n",
       "      <td>17.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>189.0</td>\n",
       "      <td>West</td>\n",
       "      <td>189.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>2023-02-02</td>\n",
       "      <td>B</td>\n",
       "      <td>63.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>338.0</td>\n",
       "      <td>South</td>\n",
       "      <td>338.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>2023-02-03</td>\n",
       "      <td>C</td>\n",
       "      <td>27.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>East</td>\n",
       "      <td>557.130435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2023-02-04</td>\n",
       "      <td>C</td>\n",
       "      <td>70.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>669.0</td>\n",
       "      <td>West</td>\n",
       "      <td>669.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2023-02-05</td>\n",
       "      <td>B</td>\n",
       "      <td>60.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>West</td>\n",
       "      <td>557.130435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>2023-02-06</td>\n",
       "      <td>C</td>\n",
       "      <td>36.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>177.0</td>\n",
       "      <td>East</td>\n",
       "      <td>177.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>2023-02-07</td>\n",
       "      <td>C</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>North</td>\n",
       "      <td>557.130435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>2023-02-08</td>\n",
       "      <td>C</td>\n",
       "      <td>94.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>408.0</td>\n",
       "      <td>South</td>\n",
       "      <td>408.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>2023-02-09</td>\n",
       "      <td>A</td>\n",
       "      <td>62.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>155.0</td>\n",
       "      <td>West</td>\n",
       "      <td>155.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>2023-02-10</td>\n",
       "      <td>B</td>\n",
       "      <td>15.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>578.0</td>\n",
       "      <td>East</td>\n",
       "      <td>578.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>2023-02-11</td>\n",
       "      <td>C</td>\n",
       "      <td>97.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>256.0</td>\n",
       "      <td>East</td>\n",
       "      <td>256.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>2023-02-12</td>\n",
       "      <td>A</td>\n",
       "      <td>93.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>164.0</td>\n",
       "      <td>West</td>\n",
       "      <td>164.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>2023-02-13</td>\n",
       "      <td>A</td>\n",
       "      <td>43.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>949.0</td>\n",
       "      <td>East</td>\n",
       "      <td>949.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>2023-02-14</td>\n",
       "      <td>A</td>\n",
       "      <td>96.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>830.0</td>\n",
       "      <td>East</td>\n",
       "      <td>830.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2023-02-15</td>\n",
       "      <td>B</td>\n",
       "      <td>99.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>599.0</td>\n",
       "      <td>West</td>\n",
       "      <td>599.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>2023-02-16</td>\n",
       "      <td>B</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>938.0</td>\n",
       "      <td>South</td>\n",
       "      <td>938.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>2023-02-17</td>\n",
       "      <td>B</td>\n",
       "      <td>69.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>143.0</td>\n",
       "      <td>West</td>\n",
       "      <td>143.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>2023-02-18</td>\n",
       "      <td>C</td>\n",
       "      <td>65.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>182.0</td>\n",
       "      <td>North</td>\n",
       "      <td>182.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2023-02-19</td>\n",
       "      <td>C</td>\n",
       "      <td>11.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>708.0</td>\n",
       "      <td>North</td>\n",
       "      <td>708.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Date Category  Value   Product  Sales Region  Sales_fillNA\n",
       "0   2023-01-01        A   28.0  Product1  754.0   East    754.000000\n",
       "1   2023-01-02        B   39.0  Product3  110.0  North    110.000000\n",
       "2   2023-01-03        C   32.0  Product2  398.0   East    398.000000\n",
       "3   2023-01-04        B    8.0  Product1  522.0   East    522.000000\n",
       "4   2023-01-05        B   26.0  Product3  869.0  North    869.000000\n",
       "5   2023-01-06        B   54.0  Product3  192.0   West    192.000000\n",
       "6   2023-01-07        A   16.0  Product1  936.0   East    936.000000\n",
       "7   2023-01-08        C   89.0  Product1  488.0   West    488.000000\n",
       "8   2023-01-09        C   37.0  Product3  772.0   West    772.000000\n",
       "9   2023-01-10        A   22.0  Product2  834.0   West    834.000000\n",
       "10  2023-01-11        B    7.0  Product1  842.0  North    842.000000\n",
       "11  2023-01-12        B   60.0  Product2    NaN   West    557.130435\n",
       "12  2023-01-13        A   70.0  Product3  628.0  South    628.000000\n",
       "13  2023-01-14        A   69.0  Product1  423.0   East    423.000000\n",
       "14  2023-01-15        A   47.0  Product2  893.0   West    893.000000\n",
       "15  2023-01-16        C    NaN  Product1  895.0  North    895.000000\n",
       "16  2023-01-17        C   93.0  Product2  511.0  South    511.000000\n",
       "17  2023-01-18        C    NaN  Product1  108.0   West    108.000000\n",
       "18  2023-01-19        A   31.0  Product2  578.0   West    578.000000\n",
       "19  2023-01-20        A   59.0  Product1  736.0   East    736.000000\n",
       "20  2023-01-21        C   82.0  Product3  606.0  South    606.000000\n",
       "21  2023-01-22        C   37.0  Product2  992.0  South    992.000000\n",
       "22  2023-01-23        B   62.0  Product3  942.0  North    942.000000\n",
       "23  2023-01-24        C   92.0  Product2  342.0   West    342.000000\n",
       "24  2023-01-25        A   24.0  Product2  458.0   East    458.000000\n",
       "25  2023-01-26        C   95.0  Product1  584.0   West    584.000000\n",
       "26  2023-01-27        C   71.0  Product2  619.0  North    619.000000\n",
       "27  2023-01-28        C   56.0  Product2  224.0  North    224.000000\n",
       "28  2023-01-29        B    NaN  Product3  617.0  North    617.000000\n",
       "29  2023-01-30        C   51.0  Product2  737.0  South    737.000000\n",
       "30  2023-01-31        B   50.0  Product3  735.0   West    735.000000\n",
       "31  2023-02-01        A   17.0  Product2  189.0   West    189.000000\n",
       "32  2023-02-02        B   63.0  Product3  338.0  South    338.000000\n",
       "33  2023-02-03        C   27.0  Product3    NaN   East    557.130435\n",
       "34  2023-02-04        C   70.0  Product3  669.0   West    669.000000\n",
       "35  2023-02-05        B   60.0  Product2    NaN   West    557.130435\n",
       "36  2023-02-06        C   36.0  Product3  177.0   East    177.000000\n",
       "37  2023-02-07        C    2.0  Product1    NaN  North    557.130435\n",
       "38  2023-02-08        C   94.0  Product1  408.0  South    408.000000\n",
       "39  2023-02-09        A   62.0  Product1  155.0   West    155.000000\n",
       "40  2023-02-10        B   15.0  Product1  578.0   East    578.000000\n",
       "41  2023-02-11        C   97.0  Product1  256.0   East    256.000000\n",
       "42  2023-02-12        A   93.0  Product3  164.0   West    164.000000\n",
       "43  2023-02-13        A   43.0  Product3  949.0   East    949.000000\n",
       "44  2023-02-14        A   96.0  Product3  830.0   East    830.000000\n",
       "45  2023-02-15        B   99.0  Product2  599.0   West    599.000000\n",
       "46  2023-02-16        B    6.0  Product1  938.0  South    938.000000\n",
       "47  2023-02-17        B   69.0  Product3  143.0   West    143.000000\n",
       "48  2023-02-18        C   65.0  Product3  182.0  North    182.000000\n",
       "49  2023-02-19        C   11.0  Product3  708.0  North    708.000000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### filling missing values with the mean of the column\n",
    "df['Sales_fillNA']=df['Sales'].fillna(df['Sales'].mean())\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Date             object\n",
       "Category         object\n",
       "Value           float64\n",
       "Product          object\n",
       "Sales           float64\n",
       "Region           object\n",
       "Sales_fillNA    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sales_fillNA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "      <td>754.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "      <td>398.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "      <td>522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "      <td>869.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sales Date Category  Value   Product  Sales Region  Sales_fillNA\n",
       "0  2023-01-01        A   28.0  Product1  754.0   East         754.0\n",
       "1  2023-01-02        B   39.0  Product3  110.0  North         110.0\n",
       "2  2023-01-03        C   32.0  Product2  398.0   East         398.0\n",
       "3  2023-01-04        B    8.0  Product1  522.0   East         522.0\n",
       "4  2023-01-05        B   26.0  Product3  869.0  North         869.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Renaming Columns\n",
    "df=df.rename(columns={'Sale Date':'Sales Date'})\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sales_fillNA</th>\n",
       "      <th>Value_new</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "      <td>754.0</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "      <td>110.0</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "      <td>398.0</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "      <td>522.0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "      <td>869.0</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sales Date Category  Value   Product  Sales Region  Sales_fillNA  Value_new\n",
       "0  2023-01-01        A   28.0  Product1  754.0   East         754.0         28\n",
       "1  2023-01-02        B   39.0  Product3  110.0  North         110.0         39\n",
       "2  2023-01-03        C   32.0  Product2  398.0   East         398.0         32\n",
       "3  2023-01-04        B    8.0  Product1  522.0   East         522.0          8\n",
       "4  2023-01-05        B   26.0  Product3  869.0  North         869.0         26"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## change datatypes\n",
    "df['Value_new']=df['Value'].fillna(df['Value'].mean()).astype(int)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sales_fillNA</th>\n",
       "      <th>Value_new</th>\n",
       "      <th>New Value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "      <td>754.0</td>\n",
       "      <td>28</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "      <td>110.0</td>\n",
       "      <td>39</td>\n",
       "      <td>78.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "      <td>398.0</td>\n",
       "      <td>32</td>\n",
       "      <td>64.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "      <td>522.0</td>\n",
       "      <td>8</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "      <td>869.0</td>\n",
       "      <td>26</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sales Date Category  Value   Product  Sales Region  Sales_fillNA  \\\n",
       "0  2023-01-01        A   28.0  Product1  754.0   East         754.0   \n",
       "1  2023-01-02        B   39.0  Product3  110.0  North         110.0   \n",
       "2  2023-01-03        C   32.0  Product2  398.0   East         398.0   \n",
       "3  2023-01-04        B    8.0  Product1  522.0   East         522.0   \n",
       "4  2023-01-05        B   26.0  Product3  869.0  North         869.0   \n",
       "\n",
       "   Value_new  New Value  \n",
       "0         28       56.0  \n",
       "1         39       78.0  \n",
       "2         32       64.0  \n",
       "3          8       16.0  \n",
       "4         26       52.0  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['New Value']=df['Value'].apply(lambda x:x*2)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales Date</th>\n",
       "      <th>Category</th>\n",
       "      <th>Value</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sales_fillNA</th>\n",
       "      <th>Value_new</th>\n",
       "      <th>New Value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2023-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>28.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>754.0</td>\n",
       "      <td>East</td>\n",
       "      <td>754.0</td>\n",
       "      <td>28</td>\n",
       "      <td>56.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2023-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>39.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>110.0</td>\n",
       "      <td>North</td>\n",
       "      <td>110.0</td>\n",
       "      <td>39</td>\n",
       "      <td>78.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2023-01-03</td>\n",
       "      <td>C</td>\n",
       "      <td>32.0</td>\n",
       "      <td>Product2</td>\n",
       "      <td>398.0</td>\n",
       "      <td>East</td>\n",
       "      <td>398.0</td>\n",
       "      <td>32</td>\n",
       "      <td>64.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2023-01-04</td>\n",
       "      <td>B</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Product1</td>\n",
       "      <td>522.0</td>\n",
       "      <td>East</td>\n",
       "      <td>522.0</td>\n",
       "      <td>8</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2023-01-05</td>\n",
       "      <td>B</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Product3</td>\n",
       "      <td>869.0</td>\n",
       "      <td>North</td>\n",
       "      <td>869.0</td>\n",
       "      <td>26</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sales Date Category  Value   Product  Sales Region  Sales_fillNA  \\\n",
       "0  2023-01-01        A   28.0  Product1  754.0   East         754.0   \n",
       "1  2023-01-02        B   39.0  Product3  110.0  North         110.0   \n",
       "2  2023-01-03        C   32.0  Product2  398.0   East         398.0   \n",
       "3  2023-01-04        B    8.0  Product1  522.0   East         522.0   \n",
       "4  2023-01-05        B   26.0  Product3  869.0  North         869.0   \n",
       "\n",
       "   Value_new  New Value  \n",
       "0         28       56.0  \n",
       "1         39       78.0  \n",
       "2         32       64.0  \n",
       "3          8       16.0  \n",
       "4         26       52.0  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Data Aggregating And Grouping\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Product\n",
      "Product1    46.214286\n",
      "Product2    52.800000\n",
      "Product3    55.166667\n",
      "Name: Value, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "grouped_mean=df.groupby('Product')['Value'].mean()\n",
    "print(grouped_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Product   Region\n",
      "Product1  East      292.0\n",
      "          North       9.0\n",
      "          South     100.0\n",
      "          West      246.0\n",
      "Product2  East       56.0\n",
      "          North     127.0\n",
      "          South     181.0\n",
      "          West      428.0\n",
      "Product3  East      202.0\n",
      "          North     203.0\n",
      "          South     215.0\n",
      "          West      373.0\n",
      "Name: Value, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "grouped_sum=df.groupby(['Product','Region'])['Value'].sum()\n",
    "print(grouped_sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Product   Region\n",
       "Product1  East      41.714286\n",
       "          North      4.500000\n",
       "          South     50.000000\n",
       "          West      82.000000\n",
       "Product2  East      28.000000\n",
       "          North     63.500000\n",
       "          South     60.333333\n",
       "          West      53.500000\n",
       "Product3  East      50.500000\n",
       "          North     40.600000\n",
       "          South     71.666667\n",
       "          West      62.166667\n",
       "Name: Value, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['Product','Region'])['Value'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Region</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>East</th>\n",
       "      <td>42.307692</td>\n",
       "      <td>550.0</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North</th>\n",
       "      <td>37.666667</td>\n",
       "      <td>339.0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>South</th>\n",
       "      <td>62.000000</td>\n",
       "      <td>496.0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West</th>\n",
       "      <td>61.588235</td>\n",
       "      <td>1047.0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             mean     sum  count\n",
       "Region                          \n",
       "East    42.307692   550.0     13\n",
       "North   37.666667   339.0      9\n",
       "South   62.000000   496.0      8\n",
       "West    61.588235  1047.0     17"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## aggregate multiple functions\n",
    "groudped_agg=df.groupby('Region')['Value'].agg(['mean','sum','count'])\n",
    "groudped_agg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Merging and joining Dataframes\n",
    "# Create sample DataFrames\n",
    "df1 = pd.DataFrame({'Key': ['A', 'B', 'C'], 'Value1': [1, 2, 3]})\n",
    "df2 = pd.DataFrame({'Key': ['A', 'B', 'D'], 'Value2': [4, 5, 6]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value1\n",
       "0   A       1\n",
       "1   B       2\n",
       "2   C       3"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value2\n",
       "0   A       4\n",
       "1   B       5\n",
       "2   D       6"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value1</th>\n",
       "      <th>Value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value1  Value2\n",
       "0   A       1       4\n",
       "1   B       2       5"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Merge Datafranme on the 'Key Columns'\n",
    "pd.merge(df1,df2,on=\"Key\",how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value1</th>\n",
       "      <th>Value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value1  Value2\n",
       "0   A     1.0     4.0\n",
       "1   B     2.0     5.0\n",
       "2   C     3.0     NaN\n",
       "3   D     NaN     6.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df1,df2,on=\"Key\",how=\"outer\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value1</th>\n",
       "      <th>Value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value1  Value2\n",
       "0   A       1     4.0\n",
       "1   B       2     5.0\n",
       "2   C       3     NaN"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df1,df2,on=\"Key\",how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Key</th>\n",
       "      <th>Value1</th>\n",
       "      <th>Value2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Key  Value1  Value2\n",
       "0   A     1.0       4\n",
       "1   B     2.0       5\n",
       "2   D     NaN       6"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df1,df2,on=\"Key\",how=\"right\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
